//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
//// This program reproduces the following tables ////
//// and figures from Duan et al. (2020):         ////
//// TABLES: 1,2,3,4                              ////
//// FIGURES: 1                                   ////
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////

clear
set more off
set type double
graph drop _all
program drop _all
set scheme s2mono

cd "\\file\Usersw$\wrr15\Home\My Documents\My Files\UCMETA\UC META-ANALYSIS WORKSHOP (March 2022)"
//log using "Part1 Results"
use "meta-orig.dta", clear

******************************************************************************
*DEFINITION OF BASIC VARIABLES
******************************************************************************
gen tstat = t
gen df = samplesize - k
gen pcc = tstat/sqrt(tstat^2+df)
gen varpcc = (1-pcc^2)/df
gen sepcc = sqrt(varpcc)

*--------------*
*   TABLE 2    *
*--------------*

******************************************************************************
* Distribution of t-stats and PCCs before kicking out outliers
******************************************************************************
summ tstat, detail
summ df, detail 
summ pcc, detail

******************************************************************************
* Kick out extreme values of PCC
******************************************************************************
quietly summ pcc, detail
keep if pcc > r(p1) & pcc < r(p99) 

summ tstat, detail
summ df, detail 
summ pcc, detail

******************************************************************************
// Alternatively, one could winsorize the extreme values
// Check out the user-written command winsor2
// ssc install winsor2
******************************************************************************


******************************************************************************
* Characteristics of t-stats and PCCs of final data set
******************************************************************************
*--------------*
*   FIGURE 1   *
*--------------*
histogram tstat, bin(80) name(histTSTAT)
gen tstat1 = (tstat<-2)
gen tstat2 = (tstat>=-2 & tstat<=2)
gen tstat3 = (tstat>2)
// NOTE: A lot of insignificant t-stats
sum tstat1 tstat2 tstat3

histogram pcc, bin(80) name(histPCC)

******************************************************************************
// Funnel Plot
// Note: You will have to install the metafunnel command
// metafunnel pcc sepcc
******************************************************************************

******************************************************************************
* Comparison of alternative "Fixed Effects" estimators
******************************************************************************
meta set pcc sepcc , fixed
// Standard FE estimate
meta regress _cons
// Can get the same estimate doing the following
gen ones = 1
vwls pcc ones ,  noconstant sd(sepcc)
*----------------------------*
*   TABLE 4 (Panel A, FE1)   *
*----------------------------*
gen FEweight1 = 1/(sepcc^2)
regress pcc [aweight = FEweight1], vce(cluster idstudy)
// The above is Fixed Effects-WLS
// Note coefficents are the same, but standard errors are different, both
// because of WLS and cluster robust standard errors

******************************************************************************
* Comparison of alternative "Random Effects" estimators
******************************************************************************
meta set pcc sepcc , random
// Standard FE estimate
meta regress _cons
scalar tau2 = e(tau2)
scalar list tau2 
// Can get the same estimate doing the following
gen seRE = sqrt(sepcc^2+tau2)
vwls pcc ones ,  noconstant sd(seRE)
*----------------------------*
*   TABLE 4 (Panel A, RE1)   *
*----------------------------*
gen REweight1 = 1/(seRE^2)
regress pcc [aweight = REweight1], vce(cluster idstudy)
// And this is Random Effects-WLS
// Again, coefficents are the same, but standard errors are different

******************************************************************************
* Determining the weight of individual studies (Fixed Effects)
******************************************************************************
preserve
// Three studies account for approximately 79% of the weight
// As a result, prefer Random Effects in subsequent analysis
gen pccFE = pcc*FEweight1
collapse (sum) pccFE FEweight1, by (idstudy)
gen avgpccFE = pccFE/FEweight1
gen avgsepccFE = sqrt(1/FEweight1)
sort idstudy
metan avgpccFE avgsepccFE, fixed lcols(idstudy) nobox
gen studyweight = _WT
*--------------------------*
*   TABLE 3 (FE column)    *
*--------------------------*
summ studyweight, detail
gsort -studyweight
drop if idstudy > 20
// I only show you this so you can see a forrest plot
metan avgpccFE avgsepccFE, fixed lcols(idstudy) nobox
graph save forrestFE, replace 
restore

******************************************************************************
* Determining the weight of individual studies (Random Effects)
******************************************************************************
preserve
gen pccRE = pcc*REweight1
collapse (sum) pccRE REweight1, by (idstudy)
gen avgpccRE = pccRE/REweight1
gen avgsepccRE = sqrt(1/REweight1)
sort id
metan avgpccRE avgsepccRE, random lcols(idstudy) nobox
gen studyweight = _WT
*--------------------------*
*   TABLE 3 (RE column)    *
*--------------------------*
summ studyweight, detail
gsort -studyweight
drop if idstudy > 20
// I only show you this so you can see a forrest plot
metan avgpccRE avgsepccRE, random lcols(id) nobox
graph save forrestRE, replace 
restore

******************************************************************************
* Number of estimates per study
******************************************************************************
bysort idstudy: egen numberests = count(coefficient)
bysort idstudy: gen estnumber = _n
summ numberests if estnumber == 1, detail
histogram numberests if estnumber == 1, bin(30) frequency

******************************************************************************
* Weighting by Number of estimates per study + stderror
******************************************************************************
*----------------------------*
*   TABLE 4 (Panel A, FE2)   *
*----------------------------*
gen FEweight2 = 1/(numberests*sepcc^2)
regress pcc [aweight = FEweight2], vce(cluster idstudy)
*----------------------------*
*   TABLE 4 (Panel A, RE2)   *
*----------------------------*
gen REweight2 = 1/(numberests*seRE^2)
regress pcc [aweight = REweight2], vce(cluster idstudy)

******************************************************************************
* Egger regressions/FAT-PET regressions
******************************************************************************
*----------------------------*
*   TABLE 4 (Panel B, FE1)   *
*----------------------------*
regress pcc sepcc [aweight = FEweight1], vce(cluster idstudy)
*----------------------------*
*   TABLE 4 (Panel B, FE2)   *
*----------------------------*
regress pcc sepcc [aweight = FEweight2], vce(cluster idstudy)
*----------------------------*
*   TABLE 4 (Panel B, RE1)   *
*----------------------------*
regress pcc sepcc [aweight = REweight1], vce(cluster idstudy)
*----------------------------*
*   TABLE 4 (Panel B, RE2)   *
*----------------------------*
regress pcc sepcc [aweight = REweight2], vce(cluster idstudy)


******************************************************************************
* Characteristics of the data
******************************************************************************

rename spill_other_firm all
rename idstudy id
rename sameregion spill_reg
rename sameindustry spill_ind
rename spill_exporter spill_ex
rename spill_value value
rename spill_employment employment
rename spill_output output
rename spill_number number
rename spill_asset asset
rename spill_rd rdsp
rename spill_other other_ms
rename olsgls ogls
rename probit plt
rename nonspherical nsph
rename endogeneity endg
rename categorical catg
rename sampleselection sps
rename panel_fe fe
rename aggregate agg
rename crosssection cs
rename labourquality labq
rename productivity prod
rename publicationyear pubyear

replace all = 1 if foreign ==1
drop foreign
rename all nondomestic
replace other_ms = 1 if asset==1
drop asset
replace impact = 0 if impact ==.
gen firm_level = 1 if agg == 0
replace firm_level = 0 if agg == 1

*-------------*
*   TABLE 1   *
*-------------*

// Spillover mechanisms
summ spill_ex spill_reg spill_ind spill_fdi 

// Spillover measures
summ number value employment output rdsp other_ms

// Dependent variable
summ binary 

// Data characteristics
gen avgyear=(end+start)/2
summ firm_level domestic avgyear 

// Countries
summ oecd eu developing china

// Industry group
summ manufacturing service it food other_industry

// Estimation type
gen other_est = 1 - ogls - plt
summ plt ogls other_est

// Estimation type - endogeneity
//gen endg_sps = (endg == 1 & sps == 1)
summ endg sps fe 
//drop endg_sps

// Control variables
summ size prod labq capital rd

// Study characteristics
summ published impact citation

// log close